This notebook is the second part of an analysis that demonstrate samples in the PBTA cluster by cancer histology using dimensionality reduction techniques, Principal Component Analysis (PCA), t-Distributed Stochastic Neighbor Embedding (t-SNE), and Uniform Manifold Approximation and Projection (UMAP).

This notebook focuses on plotting the dimension reduction score data.

It addresses issue #9 in the Open-PBTA analysis repository.

Summary of Findings:

Upon plotting the dimension reduction scores produced by each of the three unique techniques mentioned above, it was noticed that there may be batch effects present.

  • The RNA_library variable within the metadata may be useful in correcting said batch effects as suggested by the selection strategy plots for both sets of expression data. That being said, we would want to repeat analyses with the data separated by the selection strategy method applied.
  • When plotting the data points colored by disease_type_new, it seemed that they are too many distinct values (cancer types) in this variable to be informative in the plots below. The variable broad_histology was used to replace disease_type_new for this reason.

Output Files:

  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_kallisto_histology.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_kallisto_strategy.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_polyA_kallisto_histology.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_rsem_histology.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_rsem_strategy.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_polyA_rsem_histology.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_stranded_kallisto_histology.pdf
  • analyses/transcriptomic-dimension-reduction/plots/meta_grid_stranded_rsem_histology.pdf # Usage

This script is intended to be run via the command line from the top directory of the repository as follows:

Rscript -e "rmarkdown::render('analyses/transcriptomic-dimension-reduction/02-transcriptomic-analysis-plotting.Rmd', clean = TRUE)"

Set Up

Assign functions.

# Function to plot
plot_dimension_reduction <-
  function(aligned_scores_df, point_color, score1, score2) {
    # Given a data.frame that contains the scores of a dimension reduction
    # analysis and the information we want from the metadata, make a scatterplot.
    #
    # Args:
    #   aligned_scores_df: data.frame containing dimension reduction scores,
    #                           and relative information from the metadata
    #   point_color: the variable whose information will be used to color
    #                the points on the plot
    #   score1: the column number of the first dimension reduction score that we
    #           want to plot
    #   score2: the column number of the second dimension reduction score that
    #           we want to plot
    #
    # Returns:
    #   dimension_reduction_plot: the plot representing the dimension reduction
    #                             scores in the given data.frame, using the values
    #                             in `point_color` as symbols to color the points

    # transform the strings in `point_color` into symbols for plotting
    color_sym <- rlang::sym(point_color)

    dimension_reduction_plot <- ggplot2::ggplot(
      aligned_scores_df,
      ggplot2::aes(
        x = dplyr::pull(aligned_scores_df, score1),
        y = dplyr::pull(aligned_scores_df, score2),
        color = !!color_sym
      )
    ) +
      ggplot2::geom_point(alpha = 0.3) +
      ggplot2::scale_color_manual(values = (c(
        "#2b3fff", "#ec102f", "#235e31",
        "#1a6587", "#11e38c", "#a22f80",
        "#fe5900", "#1945c5", "#51f310",
        "#8b20d3", "#799d10", "#881c23",
        "#3fc6f8", "#fe5cde", "#0a7fb2",
        "#f2945a", "#6b4472", "#f4d403",
        "#76480d", "#a6b6f9"
      )))

    return(dimension_reduction_plot)
  }

Assign name of the output directories.

# Assign names of output directories
results_dir <- "results"
plots_dir <- "plots"

# Create directory to hold the output.
if (!dir.exists(plots_dir)) {
  dir.create(plots_dir)
}

Read in the tsv files containing the dimension reduction scores aligned with the metadata, which were produced in 01-transcriptomic-analysis-prep.R.

### RSEM -----------------------------------------------------------------------
polyA_rsem_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_rsem_pca_scores_aligned.tsv"))

stranded_rsem_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_rsem_pca_scores_aligned.tsv"))

rsem_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "rsem_pca_scores_aligned.tsv"))

polyA_rsem_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_rsem_tsne_scores_aligned.tsv"))

stranded_rsem_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_rsem_tsne_scores_aligned.tsv"))

rsem_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "rsem_tsne_scores_aligned.tsv"))

polyA_rsem_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_rsem_umap_scores_aligned.tsv"))

stranded_rsem_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_rsem_umap_scores_aligned.tsv"))

rsem_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "rsem_umap_scores_aligned.tsv"))

### Kallisto -------------------------------------------------------------------
polyA_kallisto_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_kallisto_pca_scores_aligned.tsv"))

stranded_kallisto_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_kallisto_pca_scores_aligned.tsv"))

kallisto_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "kallisto_pca_scores_aligned.tsv"))

polyA_kallisto_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_kallisto_tsne_scores_aligned.tsv"))

stranded_kallisto_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_kallisto_tsne_scores_aligned.tsv"))

kallisto_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "kallisto_tsne_scores_aligned.tsv"))

polyA_kallisto_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_kallisto_umap_scores_aligned.tsv"))

stranded_kallisto_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_kallisto_umap_scores_aligned.tsv"))

kallisto_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "kallisto_umap_scores_aligned.tsv"))

Plot RSEM

Combined RSEM Plots Grid

The grid of plots below shows the dimension reductions scores for the RSEM expression data, colored by broad histology and selection strategy. These plots seem to suggest the presence of batch effects, which is likely due to the need for normalization across the two batches of RNA-Seq preparation (poly-A selection versus stranded(ribosomal depletion)). This can be further determined using the RNA_library column of the metadata to attempt to correct batch effects as suggested by the selection strategy grid of plots.

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
rsem_pca_plot_histology <-
  plot_dimension_reduction(rsem_pca_aligned_df, "broad_histology", 1, 2)
rsem_tsne_plot_histology <-
  plot_dimension_reduction(rsem_tsne_aligned_df, "broad_histology", 1, 2)
rsem_umap_plot_histology <-
  plot_dimension_reduction(rsem_umap_aligned_df, "broad_histology", 1, 2)


# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `RNA_library` to color points
rsem_pca_plot_strategy <-
  plot_dimension_reduction(rsem_pca_aligned_df, "RNA_library", 1, 2)
rsem_tsne_plot_strategy <-
  plot_dimension_reduction(rsem_tsne_aligned_df, "RNA_library", 1, 2)
rsem_umap_plot_strategy <-
  plot_dimension_reduction(rsem_umap_aligned_df, "RNA_library", 1, 2)

# Extract the legend from one of the broad histology plots
legend_a <- cowplot::get_legend(
  rsem_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.40),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Extract the legend from one of the selection strategy plots
legend_b <- cowplot::get_legend(
  rsem_umap_plot_strategy +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Plot grid with RSEM data colored by broad histology
rsem_grid_histology <- cowplot::plot_grid(
  rsem_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Broad Histology (RSEM)"),
  rsem_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  rsem_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Plot grid with RSEM data colored by selection strategy
rsem_grid_strategy <- cowplot::plot_grid(
  rsem_pca_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Selection Strategy (RSEM)"),
  rsem_tsne_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  rsem_umap_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 5
)

# Add the appropriate legends to the cowplots
rsem_grid_histology <-
  cowplot::plot_grid(rsem_grid_histology, legend_a, rel_heights = c(2, .2))

rsem_grid_strategy <-
  cowplot::plot_grid(rsem_grid_strategy, legend_b, rel_heights = c(2, 2))

# Save broad histology plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_rsem_histology.pdf"),
  rsem_grid_histology,
  width = 12,
  height = 15
)

# Save selection strategy plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_rsem_strategy.pdf"),
  rsem_grid_strategy,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
rsem_grid_histology

# Display the plot grid across selection strategy
rsem_grid_strategy

Poly-A RSEM Plots Grid

The grid of plots below represents the RSEM data, filtered for the poly-A selection strategy, with data points colored by broad histology.

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
polyA_rsem_pca_plot_histology <-
  plot_dimension_reduction(polyA_rsem_pca_aligned_df, "broad_histology", 1, 7)
polyA_rsem_tsne_plot_histology <-
  plot_dimension_reduction(polyA_rsem_tsne_aligned_df, "broad_histology", 1, 2)
polyA_rsem_umap_plot_histology <-
  plot_dimension_reduction(polyA_rsem_umap_aligned_df, "broad_histology", 1, 2)

# Plot grid with RSEM data colored by broad histology
polyA_rsem_grid_histology <- cowplot::plot_grid(
  polyA_rsem_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC7") +
    ggplot2::ggtitle("Broad Histology (RSEM - poly-A)"),
  polyA_rsem_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  polyA_rsem_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 6
)

# Extract the legend from one of the plots
legend_a <- cowplot::get_legend(
  polyA_rsem_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.001, 0.45),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Add the appropriate legend to the cowplot
polyA_rsem_grid_histology <-
  cowplot::plot_grid(polyA_rsem_grid_histology, legend_a, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_polyA_rsem_histology.pdf"),
  polyA_rsem_grid_histology,
  width = 12,
  height = 15
)
# Display the plot grid across broad histologies
polyA_rsem_grid_histology

Stranded RSEM Plots Grid

The grid of plots below represents the RSEM data, filtered for the stranded selection strategy, with data points colored by broad histology.

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
stranded_rsem_pca_plot_histology <-
  plot_dimension_reduction(stranded_rsem_pca_aligned_df, "broad_histology", 2, 10)
stranded_rsem_tsne_plot_histology <-
  plot_dimension_reduction(stranded_rsem_tsne_aligned_df, "broad_histology", 1, 2)
stranded_rsem_umap_plot_histology <-
  plot_dimension_reduction(stranded_rsem_umap_aligned_df, "broad_histology", 1, 2)

# Extract the legend from one of the plots
legend_b <- cowplot::get_legend(
  stranded_rsem_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.35),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 9)
    )
)

# Plot grid with RSEM data colored by broad histology
stranded_rsem_grid_histology <- cowplot::plot_grid(
  stranded_rsem_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC2", y = "PC10") +
    ggplot2::ggtitle("Broad Histology (RSEM - Stranded)"),
  stranded_rsem_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  stranded_rsem_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 6
)

# Add the appropriate legend to the cowplot
stranded_rsem_grid_histology <-
  cowplot::plot_grid(stranded_rsem_grid_histology, legend_b, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_stranded_rsem_histology.pdf"),
  stranded_rsem_grid_histology,
  width = 12,
  height = 15
)
# Display the plot grid across broad histologies
stranded_rsem_grid_histology

Plot Kallisto

Combined Kallisto Plots Grid

The grid of plots below shows the dimension reductions scores for the Kallisto expression data, colored by the broad histology and selection strategy. These plots seem to suggest the presence of batch effects, which is likely due to the need for normalization across the two batches of RNA-Seq preparation (poly-A selection versus stranded(ribosomal depletion)). This can be further determined using the RNA_library column of the metadata to attempt to correct batch effects as suggested by the selection strategy grid of plots.

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
kallisto_pca_plot_histology <-
  plot_dimension_reduction(kallisto_pca_aligned_df, "broad_histology", 1, 2)
kallisto_tsne_plot_histology <-
  plot_dimension_reduction(kallisto_tsne_aligned_df, "broad_histology", 1, 2)
kallisto_umap_plot_histology <-
  plot_dimension_reduction(kallisto_umap_aligned_df, "broad_histology", 1, 2)

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `RNA_library` to color points
kallisto_pca_plot_strategy <-
  plot_dimension_reduction(kallisto_pca_aligned_df, "RNA_library", 1, 2)
kallisto_tsne_plot_strategy <-
  plot_dimension_reduction(kallisto_tsne_aligned_df, "RNA_library", 1, 2)
kallisto_umap_plot_strategy <-
  plot_dimension_reduction(kallisto_umap_aligned_df, "RNA_library", 1, 2)

# Extract the legend from one of the broad histology plots
legend_a <- cowplot::get_legend(
  kallisto_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.40),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 9)
    )
)

# Extract the legend from one of the selection strategy plots
legend_b <- cowplot::get_legend(
  kallisto_umap_plot_strategy +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Plot grid with Kallisto data colored by broad histology
kallisto_grid_histology <- cowplot::plot_grid(
  kallisto_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Broad Histology (Kallisto)"),
  kallisto_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  kallisto_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Plot grid with Kallisto data colored by selection strategy
kallisto_grid_strategy <- cowplot::plot_grid(
  kallisto_pca_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Selection Strategy (Kallisto)"),
  kallisto_tsne_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  kallisto_umap_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 5
)

# Add the appropriate legends to the cowplots
kallisto_grid_histology <-
  cowplot::plot_grid(kallisto_grid_histology, legend_a, rel_heights = c(2, .2))

kallisto_grid_strategy <-
  cowplot::plot_grid(kallisto_grid_strategy, legend_b, rel_heights = c(2, 2))

# Save broad histology plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_kallisto_histology.pdf"),
  kallisto_grid_histology,
  width = 12,
  height = 15
)

# Save selection strategy plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_kallisto_strategy.pdf"),
  kallisto_grid_strategy,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
kallisto_grid_histology

# Display the plot grid across selection strategy
kallisto_grid_strategy

Poly-A Kallisto Plots Grid

The grid of plots below represents the Kallisto data filtered for the poly-A selection strategy, with data points colored by broad histology.

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
polyA_kallisto_pca_plot_histology <-
  plot_dimension_reduction(polyA_kallisto_pca_aligned_df, "broad_histology", 1, 7)
polyA_kallisto_tsne_plot_histology <-
  plot_dimension_reduction(polyA_kallisto_tsne_aligned_df, "broad_histology", 1, 2)
polyA_kallisto_umap_plot_histology <-
  plot_dimension_reduction(polyA_kallisto_umap_aligned_df, "broad_histology", 1, 2)

# Plot grid with Kallisto data colored by cancer histology
polyA_kallisto_grid_histology <- cowplot::plot_grid(
  polyA_kallisto_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC7") +
    ggplot2::ggtitle("Broad Histology (Kallisto - Poly-A)"),
  polyA_kallisto_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  polyA_kallisto_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Extract the legend from one of the plots
legend_a <- cowplot::get_legend(
  polyA_kallisto_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.001, 0.45),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Add the appropriate legend to the cowplot
polyA_kallisto_grid_histology <-
  cowplot::plot_grid(polyA_kallisto_grid_histology, legend_a, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_polyA_kallisto_histology.pdf"),
  polyA_kallisto_grid_histology,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
polyA_kallisto_grid_histology

Stranded Kallisto Plots Grid

The grid of plots below represents the Kallisto data filtered for the stranded selection strategy, with data points colored by broad histology.

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
stranded_kallisto_pca_plot_histology <-
  plot_dimension_reduction(stranded_kallisto_pca_aligned_df, "broad_histology", 4, 7)
stranded_kallisto_tsne_plot_histology <-
  plot_dimension_reduction(stranded_kallisto_tsne_aligned_df, "broad_histology", 1, 2)
stranded_kallisto_umap_plot_histology <-
  plot_dimension_reduction(stranded_kallisto_umap_aligned_df, "broad_histology", 1, 2)

# Extract the legend from one of the plots
legend_b <- cowplot::get_legend(
  stranded_kallisto_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.35),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 9)
    )
)

# Plot grid with Kallisto data colored by broad histology
stranded_kallisto_grid_histology <- cowplot::plot_grid(
  stranded_kallisto_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC4", y = "PC7") +
    ggplot2::ggtitle("Broad Histology (Kallisto - Stranded)"),
  stranded_kallisto_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  stranded_kallisto_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Add the appropriate legend to the cowplot
stranded_kallisto_grid_histology <-
  cowplot::plot_grid(stranded_kallisto_grid_histology, legend_b, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_stranded_kallisto_histology.pdf"),
  stranded_kallisto_grid_histology,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
stranded_kallisto_grid_histology

Session Info

sessionInfo()
R version 3.6.0 (2019-04-26)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Debian GNU/Linux 9 (stretch)

Matrix products: default
BLAS/LAPACK: /usr/lib/libopenblasp-r0.2.19.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
 [3] LC_TIME=en_US.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=en_US.UTF-8    LC_MESSAGES=C             
 [7] LC_PAPER=en_US.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C            
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

loaded via a namespace (and not attached):
 [1] Rcpp_1.0.1       knitr_1.23       magrittr_1.5     hms_0.4.2       
 [5] cowplot_0.9.4    tidyselect_0.2.5 munsell_0.5.0    colorspace_1.4-1
 [9] R6_2.4.0         rlang_0.4.0      dplyr_0.8.3      stringr_1.4.0   
[13] tools_3.6.0      grid_3.6.0       gtable_0.3.0     xfun_0.8        
[17] htmltools_0.3.6  assertthat_0.2.1 lazyeval_0.2.2   yaml_2.2.0      
[21] digest_0.6.20    tibble_2.1.3     crayon_1.3.4     purrr_0.3.2     
[25] readr_1.3.1      ggplot2_3.2.0    base64enc_0.1-3  glue_1.3.1      
[29] evaluate_0.14    rmarkdown_1.13   labeling_0.3     stringi_1.4.3   
[33] compiler_3.6.0   pillar_1.4.2     scales_1.0.0     jsonlite_1.6    
[37] pkgconfig_2.0.2 
---
title: "Unsupervised Analysis of Transcriptomic Differences - Plotting"
author: Chante Bethell for CCDL 2019
output:
  html_notebook:
    toc: true
    toc_float: true
---

This notebook is the second part of an analysis that demonstrate samples in the 
PBTA cluster by cancer histology using dimensionality reduction techniques,
[Principal Component Analysis](https://www.rdocumentation.org/packages/stats/versions/3.6.0/topics/prcomp) (PCA), 
[t-Distributed Stochastic Neighbor Embedding](https://cran.r-project.org/web/packages/Rtsne/Rtsne.pdf) (t-SNE), 
and [Uniform Manifold Approximation and Projection](https://cran.r-project.org/web/packages/umap/vignettes/umap.html) (UMAP).

This notebook focuses on plotting the dimension reduction score data. 

It addresses [issue #9](https://github.com/AlexsLemonade/OpenPBTA-analysis/issues/9)
in the Open-PBTA analysis repository. 

## Summary of Findings:

Upon plotting the dimension reduction scores produced by each of the three 
unique techniques mentioned above, it was noticed that there may be batch 
effects present.

- The `RNA_library` variable within the metadata may be useful in 
  correcting said batch effects as suggested by the selection strategy plots 
  for both sets of expression data. That being said, we would want to repeat 
  analyses with the data separated by the selection strategy method applied. 
- When plotting the data points colored by `disease_type_new`, it seemed that
  they are too many distinct values (cancer types) in this variable to be
  informative in the plots below. The variable `broad_histology` was used to 
  replace `disease_type_new` for this reason. 

## Output Files:
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_kallisto_histology.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_kallisto_strategy.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_polyA_kallisto_histology.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_rsem_histology.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_rsem_strategy.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_polyA_rsem_histology.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_stranded_kallisto_histology.pdf`
- `analyses/transcriptomic-dimension-reduction/plots/meta_grid_stranded_rsem_histology.pdf`
# Usage

This script is intended to be run via the command line from the top directory of the repository as follows: 
```
Rscript -e "rmarkdown::render('analyses/transcriptomic-dimension-reduction/02-transcriptomic-analysis-plotting.Rmd', clean = TRUE)"
```

# Set Up

Assign functions. 
```{r}
# Function to plot
plot_dimension_reduction <-
  function(aligned_scores_df, point_color, score1, score2) {
    # Given a data.frame that contains the scores of a dimension reduction
    # analysis and the information we want from the metadata, make a scatterplot.
    #
    # Args:
    #   aligned_scores_df: data.frame containing dimension reduction scores,
    #                           and relative information from the metadata
    #   point_color: the variable whose information will be used to color
    #                the points on the plot
    #   score1: the column number of the first dimension reduction score that we
    #           want to plot
    #   score2: the column number of the second dimension reduction score that
    #           we want to plot
    #
    # Returns:
    #   dimension_reduction_plot: the plot representing the dimension reduction
    #                             scores in the given data.frame, using the values
    #                             in `point_color` as symbols to color the points

    # transform the strings in `point_color` into symbols for plotting
    color_sym <- rlang::sym(point_color)

    dimension_reduction_plot <- ggplot2::ggplot(
      aligned_scores_df,
      ggplot2::aes(
        x = dplyr::pull(aligned_scores_df, score1),
        y = dplyr::pull(aligned_scores_df, score2),
        color = !!color_sym
      )
    ) +
      ggplot2::geom_point(alpha = 0.3) +
      ggplot2::scale_color_manual(values = (c(
        "#2b3fff", "#ec102f", "#235e31",
        "#1a6587", "#11e38c", "#a22f80",
        "#fe5900", "#1945c5", "#51f310",
        "#8b20d3", "#799d10", "#881c23",
        "#3fc6f8", "#fe5cde", "#0a7fb2",
        "#f2945a", "#6b4472", "#f4d403",
        "#76480d", "#a6b6f9"
      )))

    return(dimension_reduction_plot)
  }
```

Assign name of the output directories.
```{r}
# Assign names of output directories
results_dir <- "results"
plots_dir <- "plots"

# Create directory to hold the output.
if (!dir.exists(plots_dir)) {
  dir.create(plots_dir)
}
```

Read in the tsv files containing the dimension reduction scores aligned with 
the metadata, which were produced in `01-transcriptomic-analysis-prep.R`.
```{r, message = FALSE}
### RSEM -----------------------------------------------------------------------
polyA_rsem_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_rsem_pca_scores_aligned.tsv"))

stranded_rsem_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_rsem_pca_scores_aligned.tsv"))

rsem_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "rsem_pca_scores_aligned.tsv"))

polyA_rsem_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_rsem_tsne_scores_aligned.tsv"))

stranded_rsem_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_rsem_tsne_scores_aligned.tsv"))

rsem_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "rsem_tsne_scores_aligned.tsv"))

polyA_rsem_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_rsem_umap_scores_aligned.tsv"))

stranded_rsem_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_rsem_umap_scores_aligned.tsv"))

rsem_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "rsem_umap_scores_aligned.tsv"))

### Kallisto -------------------------------------------------------------------
polyA_kallisto_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_kallisto_pca_scores_aligned.tsv"))

stranded_kallisto_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_kallisto_pca_scores_aligned.tsv"))

kallisto_pca_aligned_df <-
  readr::read_tsv(file.path(results_dir, "kallisto_pca_scores_aligned.tsv"))

polyA_kallisto_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_kallisto_tsne_scores_aligned.tsv"))

stranded_kallisto_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_kallisto_tsne_scores_aligned.tsv"))

kallisto_tsne_aligned_df <-
  readr::read_tsv(file.path(results_dir, "kallisto_tsne_scores_aligned.tsv"))

polyA_kallisto_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "polyA_kallisto_umap_scores_aligned.tsv"))

stranded_kallisto_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "stranded_kallisto_umap_scores_aligned.tsv"))

kallisto_umap_aligned_df <-
  readr::read_tsv(file.path(results_dir, "kallisto_umap_scores_aligned.tsv"))
```


# Plot RSEM

## Combined RSEM Plots Grid
The grid of plots below shows the dimension reductions scores for the RSEM 
expression data, colored by broad histology and selection strategy.
These plots seem to suggest the presence of batch effects, which is likely due 
to the need for normalization across the two batches of RNA-Seq preparation 
(poly-A selection versus stranded(ribosomal depletion)).
This can be further determined using the `RNA_library` column of the
metadata to attempt to correct batch effects as suggested by the selection
strategy grid of plots.

```{r, fig.width = 10, fig.height = 18}
# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
rsem_pca_plot_histology <-
  plot_dimension_reduction(rsem_pca_aligned_df, "broad_histology", 1, 2)
rsem_tsne_plot_histology <-
  plot_dimension_reduction(rsem_tsne_aligned_df, "broad_histology", 1, 2)
rsem_umap_plot_histology <-
  plot_dimension_reduction(rsem_umap_aligned_df, "broad_histology", 1, 2)


# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `RNA_library` to color points
rsem_pca_plot_strategy <-
  plot_dimension_reduction(rsem_pca_aligned_df, "RNA_library", 1, 2)
rsem_tsne_plot_strategy <-
  plot_dimension_reduction(rsem_tsne_aligned_df, "RNA_library", 1, 2)
rsem_umap_plot_strategy <-
  plot_dimension_reduction(rsem_umap_aligned_df, "RNA_library", 1, 2)

# Extract the legend from one of the broad histology plots
legend_a <- cowplot::get_legend(
  rsem_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.40),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Extract the legend from one of the selection strategy plots
legend_b <- cowplot::get_legend(
  rsem_umap_plot_strategy +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Plot grid with RSEM data colored by broad histology
rsem_grid_histology <- cowplot::plot_grid(
  rsem_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Broad Histology (RSEM)"),
  rsem_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  rsem_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Plot grid with RSEM data colored by selection strategy
rsem_grid_strategy <- cowplot::plot_grid(
  rsem_pca_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Selection Strategy (RSEM)"),
  rsem_tsne_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  rsem_umap_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 5
)

# Add the appropriate legends to the cowplots
rsem_grid_histology <-
  cowplot::plot_grid(rsem_grid_histology, legend_a, rel_heights = c(2, .2))

rsem_grid_strategy <-
  cowplot::plot_grid(rsem_grid_strategy, legend_b, rel_heights = c(2, 2))

# Save broad histology plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_rsem_histology.pdf"),
  rsem_grid_histology,
  width = 12,
  height = 15
)

# Save selection strategy plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_rsem_strategy.pdf"),
  rsem_grid_strategy,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
rsem_grid_histology

# Display the plot grid across selection strategy
rsem_grid_strategy
```
## Poly-A RSEM Plots Grid 
The grid of plots below represents the RSEM data, filtered for the `poly-A`
selection strategy, with data points colored by broad histology. 

```{r, fig.width = 10, fig.height = 18}
# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
polyA_rsem_pca_plot_histology <-
  plot_dimension_reduction(polyA_rsem_pca_aligned_df, "broad_histology", 1, 7)
polyA_rsem_tsne_plot_histology <-
  plot_dimension_reduction(polyA_rsem_tsne_aligned_df, "broad_histology", 1, 2)
polyA_rsem_umap_plot_histology <-
  plot_dimension_reduction(polyA_rsem_umap_aligned_df, "broad_histology", 1, 2)

# Plot grid with RSEM data colored by broad histology
polyA_rsem_grid_histology <- cowplot::plot_grid(
  polyA_rsem_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC7") +
    ggplot2::ggtitle("Broad Histology (RSEM - poly-A)"),
  polyA_rsem_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  polyA_rsem_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 6
)

# Extract the legend from one of the plots
legend_a <- cowplot::get_legend(
  polyA_rsem_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.001, 0.45),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Add the appropriate legend to the cowplot
polyA_rsem_grid_histology <-
  cowplot::plot_grid(polyA_rsem_grid_histology, legend_a, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_polyA_rsem_histology.pdf"),
  polyA_rsem_grid_histology,
  width = 12,
  height = 15
)
# Display the plot grid across broad histologies
polyA_rsem_grid_histology
```

## Stranded RSEM Plots Grid
The grid of plots below represents the RSEM data, filtered for the `stranded`
selection strategy, with data points colored by broad histology. 

```{r, fig.width = 10, fig.height = 18}
# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
stranded_rsem_pca_plot_histology <-
  plot_dimension_reduction(stranded_rsem_pca_aligned_df, "broad_histology", 2, 10)
stranded_rsem_tsne_plot_histology <-
  plot_dimension_reduction(stranded_rsem_tsne_aligned_df, "broad_histology", 1, 2)
stranded_rsem_umap_plot_histology <-
  plot_dimension_reduction(stranded_rsem_umap_aligned_df, "broad_histology", 1, 2)

# Extract the legend from one of the plots
legend_b <- cowplot::get_legend(
  stranded_rsem_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.35),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 9)
    )
)

# Plot grid with RSEM data colored by broad histology
stranded_rsem_grid_histology <- cowplot::plot_grid(
  stranded_rsem_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC2", y = "PC10") +
    ggplot2::ggtitle("Broad Histology (RSEM - Stranded)"),
  stranded_rsem_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  stranded_rsem_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 6
)

# Add the appropriate legend to the cowplot
stranded_rsem_grid_histology <-
  cowplot::plot_grid(stranded_rsem_grid_histology, legend_b, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_stranded_rsem_histology.pdf"),
  stranded_rsem_grid_histology,
  width = 12,
  height = 15
)
# Display the plot grid across broad histologies
stranded_rsem_grid_histology
```
# Plot Kallisto

## Combined Kallisto Plots Grid
The grid of plots below shows the dimension reductions scores for the Kallisto 
expression data, colored by the broad histology and selection strategy.
These plots seem to suggest the presence of batch effects, which is likely due 
to the need for normalization across the two batches of RNA-Seq preparation 
(poly-A selection versus stranded(ribosomal depletion)).
This can be further determined using the `RNA_library` column of the
metadata to attempt to correct batch effects as suggested by the selection 
strategy grid of plots.

```{r, fig.width = 10, fig.height = 18}
# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
kallisto_pca_plot_histology <-
  plot_dimension_reduction(kallisto_pca_aligned_df, "broad_histology", 1, 2)
kallisto_tsne_plot_histology <-
  plot_dimension_reduction(kallisto_tsne_aligned_df, "broad_histology", 1, 2)
kallisto_umap_plot_histology <-
  plot_dimension_reduction(kallisto_umap_aligned_df, "broad_histology", 1, 2)

# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `RNA_library` to color points
kallisto_pca_plot_strategy <-
  plot_dimension_reduction(kallisto_pca_aligned_df, "RNA_library", 1, 2)
kallisto_tsne_plot_strategy <-
  plot_dimension_reduction(kallisto_tsne_aligned_df, "RNA_library", 1, 2)
kallisto_umap_plot_strategy <-
  plot_dimension_reduction(kallisto_umap_aligned_df, "RNA_library", 1, 2)

# Extract the legend from one of the broad histology plots
legend_a <- cowplot::get_legend(
  kallisto_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.40),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 9)
    )
)

# Extract the legend from one of the selection strategy plots
legend_b <- cowplot::get_legend(
  kallisto_umap_plot_strategy +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Plot grid with Kallisto data colored by broad histology
kallisto_grid_histology <- cowplot::plot_grid(
  kallisto_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Broad Histology (Kallisto)"),
  kallisto_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  kallisto_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Plot grid with Kallisto data colored by selection strategy
kallisto_grid_strategy <- cowplot::plot_grid(
  kallisto_pca_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC2") +
    ggplot2::ggtitle("Selection Strategy (Kallisto)"),
  kallisto_tsne_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  kallisto_umap_plot_strategy + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 5
)

# Add the appropriate legends to the cowplots
kallisto_grid_histology <-
  cowplot::plot_grid(kallisto_grid_histology, legend_a, rel_heights = c(2, .2))

kallisto_grid_strategy <-
  cowplot::plot_grid(kallisto_grid_strategy, legend_b, rel_heights = c(2, 2))

# Save broad histology plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_kallisto_histology.pdf"),
  kallisto_grid_histology,
  width = 12,
  height = 15
)

# Save selection strategy plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_kallisto_strategy.pdf"),
  kallisto_grid_strategy,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
kallisto_grid_histology

# Display the plot grid across selection strategy
kallisto_grid_strategy
```

## Poly-A Kallisto Plots Grid 
The grid of plots below represents the Kallisto data filtered for the `poly-A`
selection strategy, with data points colored by broad histology. 

```{r, fig.width = 10, fig.height = 18}
# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
polyA_kallisto_pca_plot_histology <-
  plot_dimension_reduction(polyA_kallisto_pca_aligned_df, "broad_histology", 1, 7)
polyA_kallisto_tsne_plot_histology <-
  plot_dimension_reduction(polyA_kallisto_tsne_aligned_df, "broad_histology", 1, 2)
polyA_kallisto_umap_plot_histology <-
  plot_dimension_reduction(polyA_kallisto_umap_aligned_df, "broad_histology", 1, 2)

# Plot grid with Kallisto data colored by cancer histology
polyA_kallisto_grid_histology <- cowplot::plot_grid(
  polyA_kallisto_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC1", y = "PC7") +
    ggplot2::ggtitle("Broad Histology (Kallisto - Poly-A)"),
  polyA_kallisto_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  polyA_kallisto_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Extract the legend from one of the plots
legend_a <- cowplot::get_legend(
  polyA_kallisto_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.001, 0.45),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 15)
    )
)

# Add the appropriate legend to the cowplot
polyA_kallisto_grid_histology <-
  cowplot::plot_grid(polyA_kallisto_grid_histology, legend_a, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_polyA_kallisto_histology.pdf"),
  polyA_kallisto_grid_histology,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
polyA_kallisto_grid_histology
```

## Stranded Kallisto Plots Grid
The grid of plots below represents the Kallisto data filtered for the `stranded`
selection strategy, with data points colored by broad histology. 

```{r, fig.width = 10, fig.height = 18}
# Run the `plot_dimension_reduction` function to plot each set of dimension
# reduction scores using `broad_histology` to color points
stranded_kallisto_pca_plot_histology <-
  plot_dimension_reduction(stranded_kallisto_pca_aligned_df, "broad_histology", 4, 7)
stranded_kallisto_tsne_plot_histology <-
  plot_dimension_reduction(stranded_kallisto_tsne_aligned_df, "broad_histology", 1, 2)
stranded_kallisto_umap_plot_histology <-
  plot_dimension_reduction(stranded_kallisto_umap_aligned_df, "broad_histology", 1, 2)

# Extract the legend from one of the plots
legend_b <- cowplot::get_legend(
  stranded_kallisto_umap_plot_histology +
    ggplot2::theme(
      legend.direction = "vertical",
      legend.position = c(0.05, 0.35),
      legend.key.width = ggplot2::unit(0.1, "cm"),
      text = ggplot2::element_text(size = 9)
    )
)

# Plot grid with Kallisto data colored by broad histology
stranded_kallisto_grid_histology <- cowplot::plot_grid(
  stranded_kallisto_pca_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "PC4", y = "PC7") +
    ggplot2::ggtitle("Broad Histology (Kallisto - Stranded)"),
  stranded_kallisto_tsne_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "t-SNE1", y = "t-SNE2"),
  stranded_kallisto_umap_plot_histology + ggplot2::theme(legend.position = "none") +
    ggplot2::labs(x = "UMAP1", y = "UMAP2"),
  labels = "AUTO",
  align = "vh",
  axis = "b",
  ncol = 1,
  nrow = 7
)

# Add the appropriate legend to the cowplot
stranded_kallisto_grid_histology <-
  cowplot::plot_grid(stranded_kallisto_grid_histology, legend_b, rel_heights = c(2, .2))

# Save plot grid
ggplot2::ggsave(
  file.path("plots", "meta_grid_stranded_kallisto_histology.pdf"),
  stranded_kallisto_grid_histology,
  width = 12,
  height = 15
)

# Display the plot grid across broad histologies
stranded_kallisto_grid_histology
```
# Session Info
```{r}
sessionInfo()
```

